/**
 * lilang
 * WordExcelExtractor.java
 * 2013-7-2下午12:13:38
 */
package oa.base.util;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;

import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.xmlbeans.XmlException;

/**
 * word  excel 解析器
 * @author lilang
 *
 */
public class WordExcelExtractor {
	
	/**
	 *  word 解析器  docx
	 * @param file
	 * @return
	 * lilang 
	 * 2013-7-2下午12:14:42
	 */
	private static String wordExtractorDocx(File file){
		String result = null;
		try {
			OPCPackage opcPackage = POIXMLDocument.openPackage(file.getPath());
			POIXMLTextExtractor ex = new XWPFWordExtractor(opcPackage);
			result = ex.getText();
		} catch (IOException e) {
			e.printStackTrace();
		} catch (XmlException e) {
			e.printStackTrace();
		} catch (OpenXML4JException e) {
			e.printStackTrace();
		}
		return result;
	}
	/**
	 * 解析word doc 后缀的
	 * @param file
	 * @return
	 * lilang 
	 * 2013-7-2下午12:29:45
	 */
	private static String wordExtractorDoc(File file){
		String result = null;
		try {
			FileInputStream inputStream = new FileInputStream(file);
			WordExtractor ex = new WordExtractor(inputStream);
			result = ex.getText();
		} catch (IOException e) {
			e.printStackTrace();
		}
		return result;
	}
	/**
	 *  word 解析器
	 * @param file
	 * @return
	 * lilang 
	 * 2013-7-2下午12:36:03
	 */
	public static String doParser(File file){
		if(!file.exists()){
			System.out.println("错误的文件路径！"+file.getPath());
			return "";
		}
		String path = file.getPath().toLowerCase();
		String last = path.substring(path.lastIndexOf("."));
		if(".doc".equals(last)){
			return wordExtractorDoc(file);
		}else if(".docx".equals(last)){
			return wordExtractorDocx(file);
		}else{
			return "";
		}
	}
	
	
	

	public static void main(String[] args) {
		File file = new File("D:/tmp/市福彩中心办公系统单一来源采购理由.docx");
		String rs = WordExcelExtractor.doParser(file);
		System.out.println(rs);
	}
	
}
